capture cd  "D:\Dropbox\book_welfare\replication\"



forvalues j=0(1) 25 { 

local beg = `j'*100000 + 1
local end = `j'*100000+100000

		import delimited  data\goodreads_books.json, delimiter(comma) rowrange(`beg':`end') clear bindquote(nobind)

		gen pubyr=""
		gen author_id ="" 
		gen book_id=""
		gen title=""
		gen isbn = ""
		gen publisher="" 
		foreach k of varlist v* {
			capture replace pubyr=`k' if index(`k',"publication_year")>0
			capture replace author_id=`k' if index(`k',"author_id")>0
			capture replace book_id=`k' if index(`k',"book_id")>0
			capture replace title=`k' if index(`k',"title")>0 & index(`k',"title_without_series")==0
			capture replace isbn=`k' if index(`k',"isbn")>0 & index(`k',"isbn13")==0
			capture replace publisher=`k' if index(`k',"publisher")>0 
			drop `k'
			}
			

	


			local variables "pubyr  book_id title  isbn publisher"
			foreach x of local variables {
				split `x', parse(": ")
				replace `x'2=subinstr(`x'2,char(34),"",.)
				drop `x' `x'1 
				rename `x'2 `x'
				}
				
			
		split author_id, parse("{")
		split author_id2, parse(": ")
		replace author_id22=subinstr(author_id22,char(34),"",.)
		drop author_id 
		rename author_id22 author_id 
		drop author_id? author_id??
		
	
	
	
	
save data\partial`j'.dta, replace 
}


use data\partial0.dta, clear 

	forvalues j=1(1) 25 {
		 append using data\partial`j'.dta 
	}
		

	gen self=index(publisher,"Amazon") + index(publisher,"CreateSpace") + index(publisher,"Lulu") +  index(publisher,"Smashwords")>0

	destring pubyr, replace 
	destring author_id, replace 
	destring book_id, replace 

	keep pubyr book_id title isbn publisher author_id self 


	merge m:1 author_id using data\author.dta 
	keep if _merge==3 
	drop _merge 


	tempfile j 
	save `j'
	
 import delimited book_id_map.csv, delimiter(comma) clear 
	 merge 1:1 book_id using `j'
	 
	 keep if _merge ==3 
	 drop _merge 
	 
	 duplicates drop 

	merge 1:m book_id using intermediate\book_id_genres.dta

	keep if _merge==3
	drop _merge

		* merge m:1 author_id using author_id_dblack.dta
		* mvencode dblack, mv(0)
		* drop if _merge==2
		* drop _merge

  save data\book_author_mapping.dta, replace 
